Install the tidyverse
This week: visualization, transformation, exploration
Rectangular data: collections of values that are each associated with a variable and an observation.
Focus on data exploration and hypothesis generation.
cran - ggplot2, pkg - ggplot2, tidyverse - ggplot2, cheatsheet - ggplot2
cran - dplyr, pkg - dplyr, tutorial - dplyr, tidyverse - dplyr, cheatsheet - dplyr
We’re using the mpg dataset that comes with ggplot2.
Use data() to review available datasets and ?mpg to access the doc for the mpg dataset.
## Warning: replacing previous import by 'tidyr::%>%' when loading 'broom'
## Warning: replacing previous import by 'tidyr::gather' when loading 'broom'
## Warning: replacing previous import by 'tidyr::spread' when loading 'broom'
The mpg dataset reports on 234 cars on 11 variables:
manufacturer, model, displ, year, cyl, trans, drv, cty, hwy, fl, class
A small sample (note the table produced with knitr::kable())
kable(mpg[c(2,30,50,100,100),])
| manufacturer | model | displ | year | cyl | trans | drv | cty | hwy | fl | class |
|---|---|---|---|---|---|---|---|---|---|---|
| audi | a4 | 1.8 | 1999 | 4 | manual(m5) | f | 21 | 29 | p | compact |
| chevrolet | k1500 tahoe 4wd | 5.3 | 2008 | 8 | auto(l4) | 4 | 11 | 14 | e | suv |
| dodge | dakota pickup 4wd | 3.7 | 2008 | 6 | auto(l4) | 4 | 14 | 18 | r | pickup |
| honda | civic | 1.6 | 1999 | 4 | manual(m5) | f | 28 | 33 | r | subcompact |
| honda | civic | 1.6 | 1999 | 4 | manual(m5) | f | 28 | 33 | r | subcompact |
ggplot(data = ) +
Add layers to it with GEOM_FUNCTIONS.
Each geom takes mapping argument which uses aes() to map the x and y axes.
Add additional dimensions by mapping with color, shapes, size to other variables
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy))
p <- ggplot(data = mpg)
p + geom_point(mapping = aes(x = cyl, y = hwy))
The gas mileage data has more dimensions then just hwy, cyl, and displ.
Let’s add class as a dimension in our plot by mapping class to color.
p + geom_point(mapping = aes(x = displ, y = hwy, color = class), size = 2)
ggplot has other mapping options: shape, size, alpha,
p + geom_point(mapping = aes(x = displ, y = hwy, color = cyl), size = 2)
Problem: cyl is a continuous variable that takes only 4 values: 4, 5, 6, 8
mpg$cyl <- as.factor(mpg$cyl)
p <- ggplot(data = mpg)
p + geom_point(mapping = aes(x = displ, y = hwy, color = cyl))
p + geom_point(mapping = aes(x = displ, y = hwy), color = "red", shape = 22, size = 1)
p + geom_point(mapping = aes(x = displ, y = hwy)) +
facet_wrap( ~ class, nrow = 2)
##Graphs that calculated new values to plot {.build}
smooth
Bar
Box
p + geom_point(mapping = aes(x = displ, y = hwy)) +
geom_smooth(mapping = aes(x = displ, y = hwy))
p + geom_smooth(mapping = aes(x = displ, y = hwy, linetype = drv))
ggplot(data = diamonds) + geom_bar(mapping = aes(x = cut))
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, y = ..prop..,group=1))
# create a dataset
library(knitr)
library(kableExtra)
specie <- c(rep("sorgho" , 3) , rep("poacee" , 3) , rep("banana" , 3) , rep("triticum" , 3) )
condition <- rep(c("normal" , "stress" , "Nitrogen") , 4)
value <- abs(rnorm(12 , 0 , 15))
data <- data.frame(specie,condition,value)
options(digits = 2)
a <- kable(data, "html")
column_spec(a, 1:2, width = "1in")
| specie | condition | value |
|---|---|---|
| sorgho | normal | 13.6 |
| sorgho | stress | 13.9 |
| sorgho | Nitrogen | 58.3 |
| poacee | normal | 19.6 |
| poacee | stress | 1.3 |
| poacee | Nitrogen | 4.3 |
| banana | normal | 19.5 |
| banana | stress | 9.7 |
| banana | Nitrogen | 21.9 |
| triticum | normal | 9.9 |
| triticum | stress | 1.4 |
| triticum | Nitrogen | 14.2 |
# Grouped
ggplot(data, aes(fill=condition, y=value, x=specie)) +
geom_bar(position="dodge", stat="identity")
# Stacked
ggplot(data, aes(fill=condition, y=value, x=specie)) +
geom_bar( stat="identity")
# Stacked Percent
ggplot(data, aes(fill=condition, y=value, x=specie)) +
geom_bar( stat="identity", position="fill")
library(RColorBrewer)
display.brewer.all(n=10)
## Color Brewer 2
display.brewer.pal(10,"Set1")
## Warning in display.brewer.pal(10, "Set1"): n too large, allowed maximum for palette Set1 is 9
## Displaying the palette you asked for with that many colors
# color with RcolorBrewer
ggplot(data, aes(fill=condition, y=value, x=specie)) +
geom_bar( stat="identity", position="fill") +
scale_fill_brewer(palette = "Set1")
# Faceting
ggplot(data, aes(y=value, x=specie, color=specie, fill=specie)) +
geom_bar( stat="identity") +
facet_wrap(~condition) + theme(legend.position = "none") # show w and w/o legend
order of bars
Name <- c( 'Juan','Michael','Andrea','Charles','Jonás','Juan','Donata','Flavia' )
City <- c('Madrid','New York','Madrid','Liverpool','Madrid','Buenos Aires','Rome','Liverpool')
City.Id <- c(1,2,1,3,1,4,5,3)
df = data.frame( Name,City,City.Id )
a <- ggplot( df,aes( x = City, text=paste("City.Id=",City.Id)) ) +geom_bar()
a
From:library(forcats)
vec <- c("a","b","c","d","a","c","d","b","d","c","d","c","a","b")
vec
## [1] "a" "b" "c" "d" "a" "c" "d" "b" "d" "c" "d" "c" "a" "b"
fvec <- as.factor(vec)
fvec
## [1] a b c d a c d b d c d c a b
## Levels: a b c d
fct_inorder(fvec)
## [1] a b c d a c d b d c d c a b
## Levels: a b c d
fct_infreq(fvec)
## [1] a b c d a c d b d c d c a b
## Levels: c d a b
order of bars
Name <- c( 'Juan','Michael','Andrea','Charles','Jonás','Juan','Donata','Flavia' )
City <- c('Madrid','Atlanta','Madrid','Liverpool','Madrid','Buenos Aires','Rome','Liverpool')
City.Id <- c(1,2,1,3,1,4,5,3)
df = data.frame( Name,City,City.Id )
a <- ggplot( df,aes( x = City, text=paste("City.Id=",City.Id)) ) + geom_bar()
a
Name <- c( 'Juan','Michael','Andrea','Charles','Jonás','Juan','Donata','Flavia' )
City <- c('Madrid','Atlanta','Madrid','Liverpool','Madrid','Buenos Aires','Rome','Liverpool')
City.Id <- c(1,2,1,3,1,4,5,3)
df = data.frame( Name,City,City.Id )
a <- ggplot( df,aes( x = fct_infreq(City), text=paste("City.Id=",City.Id)) ) + geom_bar()
a
library(plotly)
ggplotly(a)
## Warning: replacing previous import by 'shiny::validateCssUnit' when loading
## 'crosstalk'
## Warning: replacing previous import by 'shiny::br' when loading 'crosstalk'
## Warning: replacing previous import by 'shiny::tags' when loading
## 'crosstalk'
## Warning: replacing previous import by 'shiny::div' when loading 'crosstalk'
## Warning: replacing previous import by 'shiny::h1' when loading 'crosstalk'
## Warning: replacing previous import by 'shiny::h2' when loading 'crosstalk'
## Warning: replacing previous import by 'shiny::h3' when loading 'crosstalk'
## Warning: replacing previous import by 'shiny::h4' when loading 'crosstalk'
## Warning: replacing previous import by 'shiny::h5' when loading 'crosstalk'
## Warning: replacing previous import by 'shiny::h6' when loading 'crosstalk'
## Warning: replacing previous import by 'shiny::knit_print.html' when loading
## 'crosstalk'
## Warning: replacing previous import by 'shiny::tagSetChildren' when loading
## 'crosstalk'
## Warning: replacing previous import by 'shiny::includeScript' when loading
## 'crosstalk'
## Warning: replacing previous import by 'shiny::em' when loading 'crosstalk'
## Warning: replacing previous import by 'shiny::tagAppendChild' when loading
## 'crosstalk'
## Warning: replacing previous import by 'shiny::is.singleton' when loading
## 'crosstalk'
## Warning: replacing previous import by 'shiny::includeHTML' when loading
## 'crosstalk'
## Warning: replacing previous import by 'shiny::includeMarkdown' when loading
## 'crosstalk'
## Warning: replacing previous import by 'shiny::code' when loading
## 'crosstalk'
## Warning: replacing previous import by 'shiny::tagList' when loading
## 'crosstalk'
## Warning: replacing previous import by 'shiny::a' when loading 'crosstalk'
## Warning: replacing previous import by 'shiny::tagAppendAttributes' when
## loading 'crosstalk'
## Warning: replacing previous import by 'shiny::singleton' when loading
## 'crosstalk'
## Warning: replacing previous import by 'shiny::hr' when loading 'crosstalk'
## Warning: replacing previous import by 'shiny::p' when loading 'crosstalk'
## Warning: replacing previous import by 'shiny::suppressDependencies' when
## loading 'crosstalk'
## Warning: replacing previous import by 'shiny::tagAppendChildren' when
## loading 'crosstalk'
## Warning: replacing previous import by 'shiny::includeText' when loading
## 'crosstalk'
## Warning: replacing previous import by 'shiny::pre' when loading 'crosstalk'
## Warning: replacing previous import by 'shiny::span' when loading
## 'crosstalk'
## Warning: replacing previous import by 'shiny::withTags' when loading
## 'crosstalk'
## Warning: replacing previous import by 'shiny::htmlTemplate' when loading
## 'crosstalk'
## Warning: replacing previous import by 'shiny::img' when loading 'crosstalk'
## Warning: replacing previous import by 'shiny::tag' when loading 'crosstalk'
## Warning: replacing previous import by 'shiny::includeCSS' when loading
## 'crosstalk'
## Warning: replacing previous import by 'shiny::knit_print.shiny.tag' when
## loading 'crosstalk'
## Warning: replacing previous import by 'shiny::knit_print.shiny.tag.list'
## when loading 'crosstalk'
## Warning: replacing previous import by 'shiny::strong' when loading
## 'crosstalk'
## Warning: replacing previous import by 'shiny::HTML' when loading
## 'crosstalk'
mpg$cyl <- as.factor(mpg$cyl)
p <- ggplot(data = mpg)
a <- p + geom_point(mapping = aes(x = displ, y = hwy, color = cyl, shape = manufacturer)) + theme(legend.position='none')
ggplotly(a, showlegend=FALSE)
library(forcats)
# Create data
Name <- c( 'Juan','Michael','Andrea','Charles',
'Jonás','Juan','Donata','Flavia' )
City <- c('Madrid','New York','Madrid','Liverpool',
'Madrid','Buenos Aires','Rome','Liverpool')
City.Id <- c(1,2,1,3,1,4,5,3)
df = data.frame( Name,City,City.Id )
# libraries
library(gridExtra)
# Make 3 simple graphics:
g1=ggplot(mtcars, aes(x=qsec)) + geom_density(fill="slateblue")
g2=ggplot(mtcars, aes(x=drat, y=qsec, color=cyl)) + geom_point(size=5) + theme(legend.position="none")
g3=ggplot(mtcars, aes(x=factor(cyl), y=qsec, fill=cyl)) + geom_boxplot() + theme(legend.position="none")
g4=ggplot(mtcars , aes(x=factor(cyl), fill=factor(cyl))) + geom_bar()
# Show the 4 plots on the same page
grid.arrange(g1, g2, g3, g4, ncol=2, nrow =2)
# Plots
grid.arrange(g2, arrangeGrob(g3, g4, ncol=2), nrow = 2)
grid.arrange(g1, g2, g3, nrow = 3)
grid.arrange(g2, arrangeGrob(g3, g4, ncol=2), nrow = 1)
grid.arrange(g2, arrangeGrob(g3, g4, nrow=2), nrow = 1)